clear all
set more off, perm
set maxvar 120000
set varabbrev off
* ---------------------------------------------- *
global dir 	"`1'"
global Data 	$dir/Data
global Tables 	$dir/Tables
global Figures 	$dir/Figures
global Work 	$dir/Work
global Temp 	$dir/Temp
global Pseudo	$dir/Work/Temp
* ---------------------------------------------- *
* [1] Get CRSP
* figure out whether security is common stock
import sas $Data/CRSP/stocknames.sas7bdat, clear case(lower)
keep permno permco comnam namedt nameenddt siccd shrcd exchcd 
sort permco permno nameenddt

* find end-of-the-month day for the last record of each stock
by permco permno:  gen obs=_n
by permco permno:  gen tot=_N
replace nameenddt=mdy(12,31,year(nameenddt)) 			if month(nameenddt)==12 & obs==tot
replace nameenddt=mdy(month(nameenddt)+1,1,year(nameenddt))-1 	if month(nameenddt)<=11 & obs==tot

drop obs tot

tempfile header
save "`header'", replace
/* ********************************************************************* */
import sas $Data/CRSP/msf.sas7bdat, clear case(lower)
keep permno permco date prc shrout ret retx

joinby permco permno using "`header'"
keep if date>=namedt & date<=nameenddt

* some duplicates because --for example-- different siccd/comnam
duplicates drop permco permno shrcd prc date ret retx, force

* sanity check: no duplicates
duplicates tag permco permno date, gen(dup)
tabulate dup

keep if shrcd>=10 & shrcd<=11
keep if exchcd>0  & exchcd<4

gen mcap=abs(prc)*shrout
keep if mcap>0

gen datem=mofd(date)
format datem %tm

tsset permno datem
forvalues x=1(1)12 {
	gen ret`x'=F`x'.ret
}	
order *, sequential
* When there are multiple securities, take the average return and permno & exchcd for the security with the largest market cap
sort permco date mcap 
collapse (sum) mcap (mean) ret ret1-ret12 (lastnm) permno exchcd date, by(permco datem)

tsset permco datem
gen L6mcap=L6.mcap

save $Temp/crsp, replace
/* ********************************************************************* */
* [2] LOAD ANNUAL COMPUSTAT
* wrds library: /wrdslin/comp/sasdata/d_na/
import sas using $Data/Compustat/funda.sas7bdat, case(lower) clear
su gvkey datadate at pstkl txditc pstkrv seq pstk indfmt datafmt popsrc consol revt sale cogs xsga xint mibt

keep if indfmt=="INDL"
keep if datafmt=="STD"
keep if popsrc=="D" 
keep if consol=="C" 
keep if datadate>=mdy(1,1,1959)
drop if index(curcd,"CAD")
format datadate %td
* *********************************
* compute value of preferred stock
* pstkrv: redemption value 
* pstkl:  liquidating value 
* pstk:   carrying value 

egen 	pfd=rowfirst(pstkrv pstkl pstk)
replace pfd=0 if pfd==.

* value of common equity
generate steq=seq				/* Stockholders Equity */
replace  steq=ceq+pfd		if steq==.	/* Common/Ordinary Equity + Preferred  */
replace  steq=at-lt		if steq==.	/* assets minus liabilities */

* txditc: taxes and investment tax credit
egen txdbitcb=rowtotal(txdb itcb)		/* deferred taxes + investment tax credit; "rowtotal" treats missing as 0 */
egen dtax=rowfirst(txditc txdbitcb)

* Ken writes on his website: "Because of changes in the treatment of deferred taxes described in FASB 109, files produced after August 2016 no longer add Deferred Taxes and Investment 
* Tax Credit to BE for fiscal years ending in 1993 or later."
replace dtax=0 if fyear>=1993

gen bookeq=steq + dtax -pfd

* clean surplus accouting
sort gvkey datadate
by gvkey: replace  bookeq=bookeq[_n-1]+ib-dvc if gvkey[_n]==gvkey[_n-1] & year(datadate)==year(datadate[_n-1])+1 & bookeq==.

* compute earnings before extraordinary items
generate street=ni - xido - spi - nrtxt
replace  street=ib		if street==.
replace  street=ni  		if street==.
gen  year = year(datadate)
/* ********************************************************************* */
label var bookeq "Book value of equity (annual)"
/* ************************* */
* Ken's website: In August 2018, we have revised the method for computing Operating Profitability. We now include minority interest in the denominator, so the operating profitability ratio used to form portfolios in June of year t is annual revenues minus cost of goods sold, interest expense, and selling, general, and administrative expense divided by the sum of book equity and minority interest for the last fiscal year ending in t-1. 
egen cost=rowtotal(cogs xsga xint), missing
gen op=(revt-cost)/bookeq	if bookeq>0
replace mibt=0 if mibt==.
replace op=(revt-cogs-xsga-xint)/(bookeq+mibt)	if bookeq+mibt>0 & datadate>mdy(8,1,2018)
/* ************************* */
* Investment portfolios: Formed on the change in total assets from the FY ending in year t-2 to the FY ending in t-1, divided by t-2 total assets; using NYSE breakpoints
encode gvkey, gen(id)
gen datem=mofd(datadate)
format datem %tm
tsset  id datem
gen inv=(at/L12.at)-1			if at>0 & L12.at>0
/* ************************* */
keep gvkey datadate conm sich year fyear at sale bookeq intan gdwl inv csho ib ni street revt cogs xsga xint xrd op cost  
save $Temp/tmp, replace
/* ********************************************************************* */
* http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/det_historical_be_data.html
import excel $Data/Compustat/DFF_BE_With_Nonindust.xlsx, sheet("DFF_BE_With_Nonindust") firstrow clear
drop lst_year_Moody fst_year_Moody
reshape long bookequity, i(permno) j(year)
replace bookequity=. if bookequity==-99.99
keep if bookequity<.
rename bookequity book_davis
gen datem=mofd(mdy(6,30,year))
format datem %tm
* assume december fiscal year
gen datadate=mdy(12,31,year-1)
format datadate %td
drop year
* add permcos and get rid of stuff that trades outside of NYSE/ASE/NASDAQ; restrict to common stock
merge 1:1 permno datem using $Temp/crsp, keepusing(permco permno datem)
keep if _merge==3
drop _merge
save $Temp/davis, replace 
/* ********************************************************************* */
import sas using $Data/CRSP/ccmxpf_linktable.sas7bdat, case(lower) clear
keep if  index(linktype,"L") 
keep if (index(linkprim,"P") |  index(linkprim,"C"))

joinby gvkey using $Temp/tmp
keep if datadate>=linkdt & datadate<=linkenddt
rename lpermno permno
rename lpermco permco

* keep linkprim='P' for selecting just one gvkey-permno-date combination 
* P = Primary, identified by Compustat in monthly security data
duplicates tag permno datadate year, gen(dup)
tabulate dup
drop if dup>0 & index(linkprim,"P")==0
drop dup

/* Some companies change fiscal year end in the middle of the calendar year */
/* In these cases, there are more than one annual record for accounting data */
/* We select the last annual record in a given calendar year      */
sort permno year datadate
by   permno year: keep if _n==_N

duplicates tag permno datadate, gen(dup)
tabulate dup
drop dup
* Below i am following WRDS.
* "datadate" is the date when COMPUSTAT (last) updated the dataset.
* In practice, "fyear" and datadate are the same for 90% of observations. 
* For the other 10% of obs, fyear is 12 months earlier than datadate.  
gen datem=mofd(mdy(6,30,year(datadate)+1))
format datem %tm
/* ********************************************************************* */
merge 1:1 permno datem using $Temp/davis
drop _merge

* put mcap in June(t) and Dec(t-1)
merge 1:1 permno permco datem using $Temp/crsp
keep if _merge==3
drop _merge
tsset permno datem
rename mcap size
replace op =(revt-cost)/book_davis	if bookeq==. & book_davis>0

generate beme=1000*bookeq/L6mcap	if bookeq>0
replace  beme=1000*book_davis/L6mcap 	if bookeq==. & book_davis>0
order permco permno datadate datem conm beme bookeq size inv op

sort gvkey datem
by gvkey: gen count=_n
save $Data/read_compustat, replace
/* ********************************************************************* */
!find $Temp -type f -delete
